import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import h5py
import pandas as pd
from utils.parquet import save_embeddings_to_parquet
path = 'embeddings_o.h5'
csv_path = 'data/split_results/goods_images_eval_o.csv'

csv = pd.read_csv(csv_path)

data = h5py.File(path, 'r')

ans = {}

for idx, row in csv.iterrows():
    
    image_id = str(row['image_id'])
    image_url = str(row['image_url'])
    goods_id = str(row['goods_id'])
    embedding = data[goods_id+'_'+image_id][:]
    ans[image_id] = {
        'image_id':image_id,
        'image_url':image_url,
        'goods_id':goods_id,
        'embedding':embedding
    }

save_embeddings_to_parquet(ans, 'embeddings_o.parquet')